Importing Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2) 
library(ggrepel)
library(knitr)
library(chron)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-2, (SVN revision 814)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/Ron III/Documents/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/Ron III/Documents/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1
library(rgeos)
## rgeos version: 0.4-2, (SVN revision 581)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 
##  Linking to sp version: 1.3-1 
##  Polygon checking: TRUE
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(sp)
library(raster)
## 
## Attaching package: 'raster'
## The following objects are masked from 'package:chron':
## 
##     origin, origin<-
## The following object is masked from 'package:dplyr':
## 
##     select
library(RCurl)
## Loading required package: bitops
library(shapefiles)
## Loading required package: foreign
## 
## Attaching package: 'shapefiles'
## The following objects are masked from 'package:foreign':
## 
##     read.dbf, write.dbf
library(maptools)
## Checking rgeos availability: TRUE
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
## 
##     getData
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: rpart
## 
## spatstat 1.58-2       (nickname: 'Not Even Wrong') 
## For an introduction to spatstat, type 'beginner'
## 
## Attaching package: 'spatstat'
## The following objects are masked from 'package:raster':
## 
##     area, rotate, shift
## The following object is masked from 'package:scales':
## 
##     rescale
library(xts) 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use

Reading in Data

crime.data <- read.csv("https://data.cityofchicago.org/api/views/ijzp-q8t2/rows.csv?accessType=DOWNLOAD",nrows = 5000)

Cleaning of data

cleaned.crime.data <- subset(crime.data, !duplicated(crime.data$Case.Number)) #Removal of duplicates
#cleaned.crime.data <- cleaned.crime.data %>% filter(Arrest=="true") #Limiting to only arrests 
cleaned.crime.data$Date <- as.POSIXct(cleaned.crime.data$Date,format=" %m/%d/%Y %H:%M") #readable date format
cleaned.crime.data$Time <- times(format(cleaned.crime.data$Date," %H:%M:%S")) #inclusion of time column

# Separating into time chucks for time analysis which will aid in seasonal analysis
Time.tag <- chron(times= c("00:00:00", "06:00:00", "12:00:00", "18:00:00","23:59:00"))
cleaned.crime.data$Time.tag <- cut(cleaned.crime.data$Time, breaks=Time.tag,labels=c("00-06","06-12", "12-18", "18-00"), include.lowest=TRUE)
cleaned.crime.data$Date <- as.POSIXct(strptime(cleaned.crime.data$Date,format= " %Y-%m-%d"))#readable date format / without time

# Removal of NAs found within Longitude and Latitutde and Date
cleaned.crime.data <- subset(cleaned.crime.data, !is.na(cleaned.crime.data$Latitude))
cleaned.crime.data <- subset(cleaned.crime.data, !is.na(cleaned.crime.data$Date))

Reducing the number of crime types/descriptions

Number of Different types of Crime type

length(table(cleaned.crime.data$Primary.Type)) # 30 found
## [1] 26
(table(cleaned.crime.data$Primary.Type)) #occurances of each
## 
##                             ARSON                           ASSAULT 
##                                11                               386 
##                           BATTERY                          BURGLARY 
##                               984                               246 
## CONCEALED CARRY LICENSE VIOLATION               CRIM SEXUAL ASSAULT 
##                                 3                                26 
##                   CRIMINAL DAMAGE                 CRIMINAL TRESPASS 
##                               565                               115 
##                DECEPTIVE PRACTICE                          HOMICIDE 
##                               271                                 5 
##                 HUMAN TRAFFICKING  INTERFERENCE WITH PUBLIC OFFICER 
##                                 1                                23 
##                      INTIMIDATION                        KIDNAPPING 
##                                 3                                 2 
##               MOTOR VEHICLE THEFT                         NARCOTICS 
##                               191                               257 
##                         OBSCENITY        OFFENSE INVOLVING CHILDREN 
##                                 4                                52 
##                     OTHER OFFENSE                      PROSTITUTION 
##                               281                                17 
##            PUBLIC PEACE VIOLATION                           ROBBERY 
##                                24                               224 
##                       SEX OFFENSE                          STALKING 
##                                18                                 3 
##                             THEFT                 WEAPONS VIOLATION 
##                              1125                               118

Creating classifications for crime types

cleaned.crime.data$crime <- as.character(cleaned.crime.data$Primary.Type)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime %in% c("CRIM SEXUAL ASSAULT","PROSTITUTION", "SEX OFFENSE"), "SEXUAL", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime %in% c("MOTOR VEHICLE THEFT"),"VEHICLE", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime %in% c("GAMBLING", "INTERFERE WITH PUBLIC OFFICER","INTERFERENCE WITH PUBLIC OFFICER" ,"INTIMIDATION",
                                                     "LIQUOR LAW VIOLATION",  "OBSCENITY" , "NON-CRIMINAL","PUBLIC PEACE VIOLATION", "PUBLIC INDECENCY", 
                                                     "STALKING" ,  "NON-CRIMINAL (SUBJECT SPECIFIED)" ), "NON-VIOLATION", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime == "CRIMINAL DAMAGE", "DAMAGE",cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime == "CRIMINAL TRESPASS","TRESPASS", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime %in% c("NARCOTICS","OTHER NARCOTIC VIOLATION","OTHER NARCOTIC VIOLATION"),
                                   "DRUG", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime ==  "DECEPTIVE PRACTICE","FRAUD", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime %in% c("OTHER OFFENSE", "OTHER OFFENSE"), "OTHER", cleaned.crime.data$crime)

cleaned.crime.data$crime <- ifelse(cleaned.crime.data$crime %in% c("KIDNAPPING", "WEAPONS VIOLATION", "OFFENSE INVOLVING CHILDREN"), "VIOLATION", cleaned.crime.data$crime)

Data Exploration

Aggregated Data Presenting Top 6 Types of Crimes and Relative Distribution

huh<- as.data.frame(cleaned.crime.data)
df_crime_daily <- huh %>%
  group_by(Date) %>%
  summarize(count = n()) %>%
  arrange(Date)
df_category <- sort(table(huh$Primary.Type),decreasing = TRUE)
df_category <- data.frame(df_category[df_category > 1])
colnames(df_category) <- c("Category", "Frequency")
df_category$Percentage <- df_category$Frequency / sum(df_category$Frequency)
x<-head(df_category)
x
##             Category Frequency Percentage
## 1              THEFT      1125 0.22708922
## 2            BATTERY       984 0.19862737
## 3    CRIMINAL DAMAGE       565 0.11404925
## 4            ASSAULT       386 0.07791683
## 5      OTHER OFFENSE       281 0.05672184
## 6 DECEPTIVE PRACTICE       271 0.05470327

Map showing crime in chicago

library(leaflet)
## 
## Attaching package: 'leaflet'
## The following object is masked from 'package:xts':
## 
##     addLegend
data <- huh 
data$popup <- paste("<b>Incident #: </b>", data$Case.Number, "<br>", "<b>Category: </b>", data$Primary.Type,
                    "<br>", "<b>Description: </b>", data$Description,
                    "<br>", "<b>Location Description: </b>", data$Location.Description,
                    "<br>", "<b>Community Area: </b>", data$Community.Area,
                    "<br>", "<b>Time: </b>", data$Time,
                    "<br>", "<b>Time: </b>", data$Year,
                    "<br>", "<b>Crime Type: </b>", data$crime,
                    "<br>", "<b>Longitude: </b>", data$Longitude,
                    "<br>", "<b>Latitude: </b>", data$Latitude)

leaflet(data, width = "100%") %>% addTiles() %>%
  addTiles(group = "OSM (default)") %>%
  addProviderTiles(provider = "Esri.WorldStreetMap",group = "World StreetMap") %>%
  addProviderTiles(provider = "Esri.WorldImagery",group = "World Imagery") %>%
  # addProviderTiles(provider = "NASAGIBS.ViirsEarthAtNight2012",group = "Nighttime Imagery") %>%
  addMarkers(lng = ~Longitude, lat = ~Latitude, popup = data$popup, clusterOptions = markerClusterOptions()) %>%
  addLayersControl(
    baseGroups = c("OSM (default)","World StreetMap", "World Imagery"),
    options = layersControlOptions(collapsed = FALSE)
  )

# Visualization of Crimes and Arrest Relationship

cleaned.crime.data$crime <- as.factor(cleaned.crime.data$crime)

by_Date <- cleaned.crime.data %>% group_by(Date) %>% summarise(Total = n())
tseries <- xts(by_Date$Total, order.by=as.POSIXct(by_Date$Date))

#Arrests_by_Date$Date[!(Arrests_by_Date$Date %in% by_Date$Date)]
## Creating timeseries of arrests made
Arrests_by_Date <- (cleaned.crime.data[cleaned.crime.data$Arrest == 'true',]) %>% group_by(Date) %>% summarise(Total = n())
arrests_tseries <- xts(Arrests_by_Date$Total, order.by=as.POSIXct(Arrests_by_Date$Date))


hchart(tseries, name = "crime") %>% 
 hc_add_series(arrests_tseries, name = "Arrest") %>%
 hc_add_theme(hc_theme_monokai()) %>%
 hc_credits(enabled = TRUE, text = "Sources: City of Chicago Administration and the Chicago Police Department", style = list(fontSize = "12px")) %>%
 hc_title(text = "Trend of Chicago Crimes and Arrests") %>%
 hc_legend(enabled = TRUE)
# %-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0
# %-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0
# %-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0
# %-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0
# %-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0
# %-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0-0

Cluster Identification Methods

Within this section we will be implementing numerous techniques to check for any form of clustering within our data points. Here we will be utilizing point pattern analysis as a metric of finding clustering, and we will be utilizing the Kolmogorov-Smirnov test as another method of determining if there are clusters.

Beginning Point Pattern Analysis

coords <- SpatialPoints(cleaned.crime.data[,c("Longitude","Latitude")])
crime_spatial_df <-SpatialPointsDataFrame(coords,cleaned.crime.data)
proj4string(crime_spatial_df) <- CRS("+proj=longlat +ellps=WGS84")
str(crime_spatial_df)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  4955 obs. of  25 variables:
##   .. ..$ ID                  : int [1:4955] 11544274 11544276 11544292 11544298 11544300 11544303 11544313 11544314 11544317 11544319 ...
##   .. ..$ Case.Number         : Factor w/ 5000 levels "HV507793","HX173201",..: 246 251 262 255 254 249 256 266 275 259 ...
##   .. ..$ Date                : POSIXct[1:4955], format: "2018-12-23" ...
##   .. ..$ Block               : Factor w/ 3832 levels "0000X E 100TH ST",..: 2316 180 2875 595 1793 827 1409 1044 78 2155 ...
##   .. ..$ IUCR                : Factor w/ 180 levels "0110","0265",..: 89 27 175 91 9 103 23 33 61 33 ...
##   .. ..$ Primary.Type        : Factor w/ 26 levels "ARSON","ASSAULT",..: 7 3 19 7 22 26 3 3 25 3 ...
##   .. ..$ Description         : Factor w/ 169 levels "$500 AND UNDER",..: 59 138 164 152 27 130 21 65 78 65 ...
##   .. ..$ Location.Description: Factor w/ 88 levels "","ABANDONED BUILDING",..: 79 76 79 79 76 79 60 12 18 12 ...
##   .. ..$ Arrest              : Factor w/ 2 levels "false","true": 1 2 2 1 1 2 2 1 1 2 ...
##   .. ..$ Domestic            : Factor w/ 2 levels "false","true": 1 1 1 2 1 1 1 2 1 2 ...
##   .. ..$ Beat                : int [1:4955] 1713 1831 823 1232 1921 2534 421 2534 1824 1011 ...
##   .. ..$ District            : int [1:4955] 17 18 8 12 19 25 4 25 18 10 ...
##   .. ..$ Ward                : int [1:4955] 33 42 23 11 32 37 7 26 2 24 ...
##   .. ..$ Community.Area      : int [1:4955] 14 8 65 28 5 23 43 23 8 29 ...
##   .. ..$ FBI.Code            : Factor w/ 21 levels "01A","02","03",..: 14 10 21 14 3 15 5 10 7 10 ...
##   .. ..$ X.Coordinate        : int [1:4955] 1152858 1175301 1150780 1170909 1160776 1148138 1193717 1147806 1175906 1148136 ...
##   .. ..$ Y.Coordinate        : int [1:4955] 1931371 1903292 1862712 1895812 1922170 1908473 1855704 1910461 1908369 1893033 ...
##   .. ..$ Year                : int [1:4955] 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
##   .. ..$ Updated.On          : Factor w/ 47 levels "01/01/2019 04:05:25 PM",..: 45 45 45 45 45 45 45 45 45 45 ...
##   .. ..$ Latitude            : num [1:4955] 42 41.9 41.8 41.9 41.9 ...
##   .. ..$ Longitude           : num [1:4955] -87.7 -87.6 -87.7 -87.6 -87.7 ...
##   .. ..$ Location            : Factor w/ 4447 levels "","(41.647039035, -87.616097797)",..: 4120 2923 1408 2367 3870 3302 933 3425 3285 2254 ...
##   .. ..$ Time                : 'times' num [1:4955] 12:10:00 01:35:00 03:00:00 01:53:00 01:45:00 ...
##   .. .. ..- attr(*, "format")= chr "h:m:s"
##   .. ..$ Time.tag            : Factor w/ 4 levels "00-06","06-12",..: 3 1 1 1 1 3 1 1 1 1 ...
##   .. ..$ crime               : Factor w/ 18 levels "ARSON","ASSAULT",..: 6 3 12 6 13 18 3 3 15 3 ...
##   ..@ coords.nrs : num(0) 
##   ..@ coords     : num [1:4955, 1:2] -87.7 -87.6 -87.7 -87.6 -87.7 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:4955] "1" "2" "3" "4" ...
##   .. .. ..$ : chr [1:2] "Longitude" "Latitude"
##   ..@ bbox       : num [1:2, 1:2] -87.9 41.6 -87.5 42
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:2] "Longitude" "Latitude"
##   .. .. ..$ : chr [1:2] "min" "max"
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   .. .. ..@ projargs: chr "+proj=longlat +ellps=WGS84"

By utilizing the SpatialPoints we are able to see a data frame with 5 slots/components including: data: the origin data read into R coords.nrs: The data type of the coordinates coords: these are the coordinates bbox: This is the bounding box of the cordinates, and proj4string: THis is the Coordinate Reference System. (places our data in longitude and latitude format)

saveRDS(crime_spatial_df, "C:/Users/Ron III/Documents/GitHub/Chicago_Crime_Evolution_Analysis/crime_spatial_df.rds")
unzip("C:/Users/Ron III/Documents/GitHub/Chicago_Crime_Evolution_Analysis/Data_Sets_Files/Boundaries - ZIP Codes.zip", exdir = "C:/Users/Ron III/Documents/Crime_Research/shapefiles_1", overwrite = TRUE)
## Warning in unzip("C:/Users/Ron III/Documents/GitHub/
## Chicago_Crime_Evolution_Analysis/Data_Sets_Files/Boundaries - ZIP
## Codes.zip", : error 1 in extracting from zip file
illinois_shp <- readOGR(dsn = "C:/Users/Ron III/Documents/GitHub/Chicago_Crime_Evolution_Analysis/shapefiles_1",layer = "geo_export_6c977322-0a01-4386-849c-278ec22209fe" )
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Ron III\Documents\GitHub\Chicago_Crime_Evolution_Analysis\shapefiles_1", layer: "geo_export_6c977322-0a01-4386-849c-278ec22209fe"
## with 61 features
## It has 4 fields
# 
# plot(crime_spatial_df,pch="+",cex=0.5,main="",col=crime_spatial_df$Primary.Type)
# plot(illinois_shp,add=T)
# legend(x=-0.53,y=51.41,pch="+",col=unique(crime_spatial_df$Primary.Type),legend=unique(crime_spatial_df$))
lon <- cleaned.crime.data$Longitude
lat <- cleaned.crime.data$Latitude
xrange <- range(lon,na.rm = T)
yrange <- range(lat,na.rm = T)
crime_ppp <- ppp(lon,lat,xrange,yrange,marks = as.factor(cleaned.crime.data$Primary.Type))
## Warning: data contain duplicated points
plot(crime_ppp, main = "Our Crime PPP Object")

Creating Quadrats and plot intensity using Spatstat

q <- quadratcount(crime_ppp, nx = 7, ny = 3)
plot(crime_ppp, cex = 0.5, pch = "+")
plot(q, add=T, cex=2, main = "Groovy quadrat plot")

Representing Crime denisty in Chicago

ds <- density(crime_ppp)
plot(ds, main = "Crime density")

Kolmogorov-Smirnov test of CSR

Here we are testing assumptions of our spatial distribution of our data point as being complete spatial randomness (CSR). The purpose of this is to find if our data points are clustered in space.

quadrat.test(crime_ppp, nx = 10, ny = 15)
## 
##  Chi-squared test of CSR using quadrat counts
##  Pearson X2 statistic
## 
## data:  crime_ppp
## X2 = 12707, df = 149, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## Quadrats: 10 by 15 grid of tiles
ks <- cdf.test(crime_ppp, "x")
## Warning in ks.test(U, "punif", ...): ties should not be present for the
## Kolmogorov-Smirnov test
plot(ks)

pval <- ks$p.value
pval
## [1] 0
ds <- density(crime_ppp)
k <- cdf.test(crime_ppp, ds)
plot(k)

Based on our Kolmogorov-Smirnov test of CSR we have found that our crime data was not from a population distributed by the standards of CSR. This is show in our presented plot above where if our data did follow the assumptions of CSR our cumulative distributions would be different.

G function: Distance to the nearest event

gtest <- Gest(crime_ppp)
gtest
## Function value object (class 'fv')
## for the function r -> G(r)
## .....................................................................
##         Math.label      Description                                  
## r       r               distance argument r                          
## theo    G[pois](r)      theoretical Poisson G(r)                     
## han     hat(G)[han](r)  Hanisch estimate of G(r)                     
## rs      hat(G)[bord](r) border corrected estimate of G(r)            
## km      hat(G)[km](r)   Kaplan-Meier estimate of G(r)                
## hazard  hat(h)[km](r)   Kaplan-Meier estimate of hazard function h(r)
## theohaz h[pois](r)      theoretical Poisson hazard function h(r)     
## .....................................................................
## Default plot formula:  .~r
## where "." stands for 'km', 'rs', 'han', 'theo'
## Recommended range of argument r: [0, 0.0027595]
## Available range of argument r: [0, 0.010389]
plot(gtest)

F function: Distance from a point to the nearest event

ftest <- Fest(crime_ppp)
ftest
## Function value object (class 'fv')
## for the function r -> F(r)
## .....................................................................
##         Math.label      Description                                  
## r       r               distance argument r                          
## theo    F[pois](r)      theoretical Poisson F(r)                     
## cs      hat(F)[cs](r)   Chiu-Stoyan estimate of F(r)                 
## rs      hat(F)[bord](r) border corrected estimate of F(r)            
## km      hat(F)[km](r)   Kaplan-Meier estimate of F(r)                
## hazard  hat(h)[km](r)   Kaplan-Meier estimate of hazard function h(r)
## theohaz h[pois](r)      theoretical Poisson hazard h(r)              
## .....................................................................
## Default plot formula:  .~r
## where "." stands for 'km', 'rs', 'cs', 'theo'
## Recommended range of argument r: [0, 0.011]
## Available range of argument r: [0, 0.011]
plot(ftest)

K function: Points witin a certain distance of a point

ktest <- Kest(crime_ppp)
## number of data points exceeds 3000 - computing border correction estimate only
ktest
## Function value object (class 'fv')
## for the function r -> K(r)
## ........................................................
##        Math.label      Description                      
## r      r               distance argument r              
## theo   K[pois](r)      theoretical Poisson K(r)         
## border hat(K)[bord](r) border-corrected estimate of K(r)
## ........................................................
## Default plot formula:  .~r
## where "." stands for 'border', 'theo'
## Recommended range of argument r: [0, 0.093871]
## Available range of argument r: [0, 0.093871]
plot(ktest)

#First plot the points
plot(crime_ppp,pch=16,cex=0.5, main="Blue Plaques in Harrow")
#now count the points in that fall in a 6 x 6 grid overlaid across the window
plot(quadratcount(crime_ppp, nx = 6, ny = 6),add=T,col="red") 

Qcount<-data.frame(quadratcount(crime_ppp, nx = 6, ny = 6))
#put the results into a data frame
QCountTable <- data.frame(table(Qcount$Freq, exclude=NULL))
#view the data frame
QCountTable
##    Var1 Freq
## 1     0   10
## 2     2    1
## 3     3    1
## 4     6    1
## 5    12    1
## 6    20    1
## 7    22    1
## 8    28    1
## 9    38    1
## 10   39    1
## 11   44    1
## 12   60    1
## 13   86    1
## 14   93    1
## 15  205    1
## 16  222    1
## 17  270    1
## 18  278    1
## 19  294    1
## 20  330    1
## 21  340    1
## 22  345    1
## 23  356    1
## 24  439    1
## 25  444    1
## 26  485    1
## 27  494    1
#we don't need the last row, so remove it
QCountTable <- QCountTable[-nrow(QCountTable),]
#check the data type in the first column - if it is factor, we will need to convert it to numeric
class(QCountTable[,1])
## [1] "factor"
#oops, looks like it's a factor, so we need to convert it to numeric
QCountTable[,1]<- as.numeric((QCountTable[,1]))
#calculate the total blue plaques (Var * Freq)
QCountTable$total <- QCountTable[,1]*QCountTable[,2]
#calculate mean
sums <- colSums(QCountTable[,-1])
sums
##  Freq total 
##    35   360
#and now calculate our mean Poisson parameter (lambda)
lambda <- sums[2]/sums[1]
#calculate expected using the Poisson formula from above - k is the number of blue plaques counted in a square and is found in the first column of our table...
QCountTable$Pr <- ((lambda^QCountTable[,1])*exp(-lambda))/factorial(QCountTable[,1])
#now calculate the expected counts and save them to the table
QCountTable$Expected <- round(QCountTable$Pr * sums[1],0)
QCountTable
##    Var1 Freq total           Pr Expected
## 1     1   10    10 3.509179e-04        0
## 2     2    1     2 1.804721e-03        0
## 3     3    1     3 6.187613e-03        0
## 4     4    1     4 1.591101e-02        1
## 5     5    1     5 3.273121e-02        1
## 6     6    1     6 5.611065e-02        2
## 7     7    1     7 8.244830e-02        3
## 8     8    1     8 1.060050e-01        4
## 9     9    1     9 1.211485e-01        4
## 10   10    1    10 1.246099e-01        4
## 11   11    1    11 1.165184e-01        4
## 12   12    1    12 9.987288e-02        3
## 13   13    1    13 7.902030e-02        3
## 14   14    1    14 5.805573e-02        2
## 15   15    1    15 3.980964e-02        1
## 16   16    1    16 2.559191e-02        1
## 17   17    1    17 1.548418e-02        1
## 18   18    1    18 8.848104e-03        0
## 19   19    1    19 4.789951e-03        0
## 20   20    1    20 2.463404e-03        0
## 21   21    1    21 1.206565e-03        0
## 22   22    1    22 5.641083e-04        0
## 23   23    1    23 2.522720e-04        0
## 24   24    1    24 1.081166e-04        0
## 25   25    1    25 4.448225e-05        0
## 26   26    1    26 1.759737e-05        0
#Compare the frequency distributions of the observed and expected point patterns
plot(c(1,5),c(0,14), type="n", xlab="Number of Blue Plaques (Red=Observed, Blue=Expected)", ylab="Frequency of Occurances")
points(QCountTable$Freq, col="Red", type="o", lwd=3)
points(QCountTable$Expected, col="Blue", type="o", lwd=3)

teststats <- quadrat.test(crime_ppp, nx = 6, ny = 6)
teststats
## 
##  Chi-squared test of CSR using quadrat counts
##  Pearson X2 statistic
## 
## data:  crime_ppp
## X2 = 7350.5, df = 35, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## Quadrats: 6 by 6 grid of tiles
plot(crime_ppp,pch=16,cex=0.5, main="Blue Plaques in Harrow")
plot(teststats, add=T, col = "red")

Density-based spatial clustering of applications with noise (DBSCAN)

library(raster)
library(fpc)
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
all_pp <- unique(crime_ppp)

#then add the coordinate unit
unitname(all_pp) <- c("meter","meter")
summary(all_pp)
## Marked planar point pattern:  4719 points
## Average intensity 32338.98 points per square meter
## 
## Coordinates are given to 6 decimal places
## 
## Multitype:
##                                   frequency   proportion  intensity
## ARSON                                    11 0.0023310020   75.38223
## ASSAULT                                 382 0.0809493500 2617.81900
## BATTERY                                 959 0.2032210000 6571.96000
## BURGLARY                                240 0.0508582300 1644.70300
## CONCEALED CARRY LICENSE VIOLATION         3 0.0006357279   20.55879
## CRIM SEXUAL ASSAULT                      25 0.0052977330  171.32330
## CRIMINAL DAMAGE                         544 0.1152787000 3727.99400
## CRIMINAL TRESPASS                       112 0.0237338400  767.52820
## DECEPTIVE PRACTICE                      255 0.0540368700 1747.49700
## HOMICIDE                                  5 0.0010595470   34.26465
## HUMAN TRAFFICKING                         1 0.0002119093    6.85293
## INTERFERENCE WITH PUBLIC OFFICER         23 0.0048739140  157.61740
## INTIMIDATION                              3 0.0006357279   20.55879
## KIDNAPPING                                2 0.0004238186   13.70586
## MOTOR VEHICLE THEFT                     189 0.0400508600 1295.20400
## NARCOTICS                               241 0.0510701400 1651.55600
## OBSCENITY                                 4 0.0008476372   27.41172
## OFFENSE INVOLVING CHILDREN               51 0.0108073700  349.49940
## OTHER OFFENSE                           278 0.0589107900 1905.11500
## PROSTITUTION                             16 0.0033905490  109.64690
## PUBLIC PEACE VIOLATION                   24 0.0050858230  164.47030
## ROBBERY                                 223 0.0472557700 1528.20300
## SEX OFFENSE                              18 0.0038143670  123.35270
## STALKING                                  3 0.0006357279   20.55879
## THEFT                                   989 0.2095783000 6777.54800
## WEAPONS VIOLATION                       118 0.0250053000  808.64580
## 
## Window: rectangle = [-87.91403, -87.5254] x [41.64704, 42.02252] meter
## Window area = 0.145923 square meter
## Unit of length: 1 meter
#we could subset the point pattern using the marks
ant_pp <- subset(all_pp,marks=="Tetramorium_caespitum")
#in that case we do not need the marks any more
ant_pp <- unmark(ant_pp)
#split based on the species names
split_pp <- split(all_pp)
class(split_pp)
## [1] "splitppp" "ppplist"  "solist"   "list"
as.matrix(lapply(split_pp,npoints),ncol=1)
##                                   [,1]
## ARSON                             11  
## ASSAULT                           382 
## BATTERY                           959 
## BURGLARY                          240 
## CONCEALED CARRY LICENSE VIOLATION 3   
## CRIM SEXUAL ASSAULT               25  
## CRIMINAL DAMAGE                   544 
## CRIMINAL TRESPASS                 112 
## DECEPTIVE PRACTICE                255 
## HOMICIDE                          5   
## HUMAN TRAFFICKING                 1   
## INTERFERENCE WITH PUBLIC OFFICER  23  
## INTIMIDATION                      3   
## KIDNAPPING                        2   
## MOTOR VEHICLE THEFT               189 
## NARCOTICS                         241 
## OBSCENITY                         4   
## OFFENSE INVOLVING CHILDREN        51  
## OTHER OFFENSE                     278 
## PROSTITUTION                      16  
## PUBLIC PEACE VIOLATION            24  
## ROBBERY                           223 
## SEX OFFENSE                       18  
## STALKING                          3   
## THEFT                             989 
## WEAPONS VIOLATION                 118
w <- hexagon(centre=c(5,5))
plot(ant_pp[w])

#split based on a window
split_ant_pp <- split(ant_pp,f=w)
summary(split_ant_pp)
## Tile 1:
## Planar point pattern:  0 points
## Average intensity 0 points per square meter
## Window: polygonal boundary
## single connected closed polygon with 6 vertices
## enclosing rectangle: [4, 6] x [4.133975, 5.866025] meter
## Window area = 2.59808 square meter
## Unit of length: 1 meter
## Fraction of frame area: 0.75
## 
## Tile 2:
## Planar point pattern:  0 points
## Average intensity 0 points per square meter
## Window: rectangle = [-87.91403, -87.5254] x [41.64704, 42.02252] meter
## Window area = 0.145923 square meter
## Unit of length: 1 meter
dens_all <- density(split_pp)
plot(dens_all)

quadrat.test(ant_pp)
## Warning: Some expected counts are small; chi^2 approximation may be
## inaccurate
## 
##  Chi-squared test of CSR using quadrat counts
##  Pearson X2 statistic
## 
## data:  ant_pp
## X2 = 2880, df = 24, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## Quadrats: 5 by 5 grid of tiles
#fit an intercept-only poisson point process model
m0 <- ppm(all_pp ~ 1)
m0
## Stationary multitype Poisson process
## 
## Possible marks: 'ARSON', 'ASSAULT', 'BATTERY', 'BURGLARY', 'CONCEALED 
## CARRY LICENSE VIOLATION', 'CRIM SEXUAL ASSAULT', 'CRIMINAL DAMAGE', 
## 'CRIMINAL TRESPASS', 'DECEPTIVE PRACTICE', 'HOMICIDE', 'HUMAN 
## TRAFFICKING', 'INTERFERENCE WITH PUBLIC OFFICER', 'INTIMIDATION', 
## 'KIDNAPPING', 'MOTOR VEHICLE THEFT', 'NARCOTICS', 'OBSCENITY', 'OFFENSE 
## INVOLVING CHILDREN', 'OTHER OFFENSE', 'PROSTITUTION', 'PUBLIC PEACE 
## VIOLATION', 'ROBBERY', 'SEX OFFENSE', 'STALKING', 'THEFT' and 'WEAPONS 
## VIOLATION'
## 
## Uniform intensity for each mark level:   1243.807
## 
##             Estimate      S.E.  CI95.lo  CI95.hi Ztest     Zval
## log(lambda) 7.125932 0.0145571 7.097401 7.154463   *** 489.5157
m1 <- ppm(ant_pp ~ polynom(x,y,2))
m1
## Error in solve.default(M) : 
##   system is computationally singular: reciprocal condition number = 1.65644e-20
## Warning: Cannot compute variance: Fisher information matrix is singular
## Error in solve.default(M) : 
##   system is computationally singular: reciprocal condition number = 1.65644e-20
## Warning: Cannot compute variance: Fisher information matrix is singular
## Nonstationary Poisson process
## 
## Log intensity:  ~x + y + I(x^2) + I(x * y) + I(y^2)
## 
## Fitted trend coefficients:
##   (Intercept)             x             y        I(x^2)      I(x * y) 
## -2.030259e+01  5.660456e-11  3.555169e-12  4.367850e-13  4.778327e-13 
##        I(y^2) 
##  4.580579e-13 
## 
## Standard errors unavailable; variance-covariance matrix is singular

Based on the results of these test we will be able to determine whether or not our data is clustered If our data happens to be clustered then we will take it to be clustering on the different types of crimes that can occur and or if possible connect the clustering to those of gang activities to relate crimes committed by gangs to crime in chicago and quantify relationship through Hierarchical Based Crime Series Linkage.